library(tidyverse)
library(googlesheets4)
library(plotly)
library(ggplot2)
library(ggridges)
library(DT)
Data! Thanks Collier
Setting up each country to have a color
CountryColors = tibble(
Country = c("USA","France","Italy","Israel","Spain","Chile" )
,color = c("rgba(255,255,255,1)","rgba(255,0,255,1)","rgba(0,255,255,1)"
,"rgba(255,255,0,1)","rgba(255,0,0,1)","rgba(0,0,255,1)")
)
Cleaning the data:
# head(WineData)
# Remove empty rows at the bottom
WineData = WineData[is.na(WineData$Name)==FALSE,]
# Add average to the no vintage wine
MeanVintage = mean(na.omit(as.numeric(WineData$Vintage)))
## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion
## Warning in na.omit(as.numeric(WineData$Vintage)): NAs introduced by coercion
# clean data: split country out, add vintage as all numeric
WineData = WineData %>%
mutate(
CountryClean = str_split(Country, ", ") # split region and country into list
,Vintage = as.vector(Vintage) #clean vintage from blended vector (dbl & chr)
,VintageClean = as.numeric(Vintage) # set data to numeric to call out NAs
) %>%
unnest_wider(CountryClean) %>% # split region and country list into two columns
rename("Region" = ...1
,"CountryClean" = ...2) %>% #rename columns from unnest
rowwise() %>%
mutate(
VintageClean = ifelse(is.na(VintageClean) == TRUE, MeanVintage, Vintage) #set unknown vintage to average
,CountryColor = CountryColors$color[match(CountryClean,CountryColors$Country)] #add color for plotting
,Size = (log(`Total Price`+1, exp(.1))+1)/2
)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Let’s take a look at the data after getting it cleaned
DT::datatable(WineData)
Bubble Chart!
Still have a few bits to edit - Size is off - Colors are off
plot_ly(
data = WineData
,x = ~VintageClean
,y = ~Price
,name = ~CountryClean
,color = ~CountryColor
,text = ~paste0("Brand: ", `Grower/bottler`
,"</br></br>Name: ", Name
,"</br>Country: ", CountryClean
,"</br>Vintage: ", Vintage
,"</br>Price: ", Price
,"</br>Bottles: ", `Bottles remaining`
,"</br>Total Value: ", `Total Price`
)
,hoverinfo = 'text'
,type = "scatter"
,mode = "markers"
,marker = list(
size = ~Size
,opacity = .7
)
)
Rdigeline histogram chart!
Fun way to compare the bottle prices by country
WineData %>%
mutate(text = fct_reorder(CountryClean,Price)) %>%
ggplot(
aes(y = text, x = Price, fill = text)
) +
geom_density_ridges(alpha = .6, stat = "binline", bins = 20) +
ggridges::theme_ridges() +
theme(
legend.position="none",
panel.spacing = unit(6, "lines"),
strip.text.x = element_text(size = 8)
) +
xlab("Price ($)") +
ylab("")